import requests
from lxml import etree
import os
import re
from time import sleep

class Spider(object):
    def __init__(self,url):
        self.url = url
        self.href = 'https://www.biquge5200.cc/0_597/437522.html'
        self.folder_path = '从前有座灵剑山'
        if os.path.exists(self.folder_path) == False:
            os.mkdir(self.folder_path)

    def next_file(self):
        response = requests.get(self.url)
        xml = etree.HTML(response.text)
        Big_chapter = xml.xpath('//div[@id="list"]/dl/dd/a')
        flag = 0
        try:
            for i in Big_chapter[9:]:
                each_html = etree.tostring(i, encoding='utf-8').decode('utf-8')
                Litsrc = re.findall(r'href="(.*?)">',each_html)[0]
                if Litsrc == self.href:
                    flag = 1
                self.href = Litsrc
                if flag:
                    self.finally_file(Litsrc)
        except Exception as e:
            print(e, '从断点处重爬')
            self.next_file()

    def finally_file(self, Litsrc):
        response = requests.get(Litsrc)
        while '服务器忙' in response.text:
            sleep(1)
            response = requests.get(Litsrc)
        xml = etree.HTML(response.text)
        Littit = ''.join(xml.xpath('//div[@class="bookname"]/h1/text()'))
        Littit = re.sub('[’"#$%&\'()*+,./;<=>?`{|}~]+','',Littit)
        content = "\n".join(xml.xpath('//div[@id="content"]/p/text()'))
        fileName = self.folder_path + "\\" + Littit + ".txt"
        print("正在保存小说文件：" + fileName)
        with open(fileName, "w", encoding="utf-8") as f:
            f.write(content)

url = 'https://www.biquge5200.cc/0_597/'
spider = Spider(url)
spider.next_file()